{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f01f0c62",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "65c16a57",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.rcParams['font.sans-serif'] = 'SimHei'\n",
    "plt.rcParams['axes.unicode_minus'] = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "56434078",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1a3dfec2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "015f5fd7",
   "metadata": {},
   "source": [
    "# 基于KNN的电影二分类预测"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0caac8bb",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## （1）导入数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5f561be5",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>打斗镜头数量</th>\n",
       "      <th>接吻镜头数量</th>\n",
       "      <th>类别</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电影编号</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>150</td>\n",
       "      <td>10</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>80</td>\n",
       "      <td>30</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>120</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100</td>\n",
       "      <td>5</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>20</td>\n",
       "      <td>80</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>50</td>\n",
       "      <td>50</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>120</td>\n",
       "      <td>10</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>20</td>\n",
       "      <td>150</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>70</td>\n",
       "      <td>30</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>40</td>\n",
       "      <td>60</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>180</td>\n",
       "      <td>5</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>10</td>\n",
       "      <td>140</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>90</td>\n",
       "      <td>20</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>60</td>\n",
       "      <td>90</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>10</td>\n",
       "      <td>180</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>150</td>\n",
       "      <td>5</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>30</td>\n",
       "      <td>100</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>50</td>\n",
       "      <td>50</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>20</td>\n",
       "      <td>160</td>\n",
       "      <td>爱情片</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>130</td>\n",
       "      <td>10</td>\n",
       "      <td>动作片</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      打斗镜头数量  接吻镜头数量   类别\n",
       "电影编号                     \n",
       "1        150      10  动作片\n",
       "2         80      30  动作片\n",
       "3          5     120  爱情片\n",
       "4        100       5  动作片\n",
       "5         20      80  爱情片\n",
       "6         50      50  爱情片\n",
       "7        120      10  动作片\n",
       "8         20     150  爱情片\n",
       "9         70      30  动作片\n",
       "10        40      60  爱情片\n",
       "11       180       5  动作片\n",
       "12        10     140  爱情片\n",
       "13        90      20  动作片\n",
       "14        60      90  爱情片\n",
       "15        10     180  爱情片\n",
       "16       150       5  动作片\n",
       "17        30     100  爱情片\n",
       "18        50      50  动作片\n",
       "19        20     160  爱情片\n",
       "20       130      10  动作片"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie = pd.read_csv('../dataset/movie.csv')\n",
    "movie.set_index('电影编号', inplace=True)\n",
    "movie"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "48ece71d",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## （2）划分数据集"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "91bdc7f4",
   "metadata": {
    "hidden": true
   },
   "source": [
    "X中的第一个维度表示样本数量，第二个围堵表示特征数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a38a3c18",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[150,  10],\n",
       "       [ 80,  30],\n",
       "       [  5, 120],\n",
       "       [100,   5],\n",
       "       [ 20,  80],\n",
       "       [ 50,  50],\n",
       "       [120,  10],\n",
       "       [ 20, 150],\n",
       "       [ 70,  30],\n",
       "       [ 40,  60],\n",
       "       [180,   5],\n",
       "       [ 10, 140],\n",
       "       [ 90,  20],\n",
       "       [ 60,  90],\n",
       "       [ 10, 180],\n",
       "       [150,   5],\n",
       "       [ 30, 100],\n",
       "       [ 50,  50],\n",
       "       [ 20, 160],\n",
       "       [130,  10]], dtype=int64)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = movie[['打斗镜头数量','接吻镜头数量']].values\n",
    "x"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c200a8c1",
   "metadata": {
    "hidden": true
   },
   "source": [
    "dataframe可以直接参与训练，但是训练数据一定得是2维的。一般推荐使用ndarray的格式进行训练。"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ed82c0f6",
   "metadata": {
    "hidden": true
   },
   "source": [
    "有监督学习，必须要有标记结果，一个训练样本必须对应一个标签。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "7c8231aa",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['动作片', '动作片', '爱情片', '动作片', '爱情片', '爱情片', '动作片', '爱情片', '动作片',\n",
       "       '爱情片', '动作片', '爱情片', '动作片', '爱情片', '爱情片', '动作片', '爱情片', '动作片',\n",
       "       '爱情片', '动作片'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = movie['类别'].values\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "a363178e",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='打斗镜头数量', ylabel='接吻镜头数量'>"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEECAYAAADOJIhPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAg6UlEQVR4nO3de7hc89338fd3H3LYkZNkCzkJFYJKiq2RSGpTnqDpIR6anpRyN0r1Rh8trV6tBPdDn7b0RrmjtGkq2hBFVbgcIocmwY4qQRwaQiLYIrJFznt/nz9+K92TySR7Jtkza2bW53Vdc+11mFnzyTLmO2v91vr9zN0REZFkqog7gIiIxEdFQEQkwVQEREQSTEVARCTBVARERBJMRUBEJMGq4g6Qi969e/ugQYPijiEiUlIWLVr0vrvXZlpXUkVg0KBBNDQ0xB1DRKSkmNmyHa3T6SARkQRTERARSTAVARGRBFMREBFJsJJqGM6X5s2wchG0bIG+dVDVKe5EIiKFkfgisGkt/G40fPAaYFDTG85ZAHv0iTuZiEj+Jf500OxJ0PhSKAabPoKm5fDQhXGnEhEpjMQXgcYXoHlj63zLZmh8Mb48IiKFlPgi0G84VHVuna/sCP0+HV8eEZFCSnwRGHUZDKoPjcFVnaHPUBjzq7hTiYgURl4bhs2sD3C3u482s4nAsdGqvYEpwB+AJ4HXouWnu3tjPjOlq+wAX/tbaAto2QI99gVLfGkUkaTIWxEws56EL/ouAO7+s5R1dxEKwHDgane/OV85smEG3QfEmUBEJB75/M3bDIwHmlIXmtlRwAp3XwEcDZxvZgvM7Lo8ZhERkQzyVgTcvcnd12RYdSFwQzQ9Exjp7iOAA81saPqTzWyCmTWYWUNjY0HPFImIlL2Cnv02sx7AXu7+r2jRfHf/KJpeAgxOf427T3b3Onevq63N2B22iIjsokI3gX4ReDBl/mEz28fMaoAxwOIC5xERSbRCF4ExwJyU+YnALGAhcIu7v1zgPCIiiZb3voPcvT5l+mtp62YBQ/KdQUREMtMV8SIiCaYiICKSYCoCIiIJpiIgIpJgKgIiIgmmIiAikmAqAiIiCaYiICKSYCoCIiIJlvc7hpNiYxM8cQW8txj6j4DRP4aqjnGnEhHZORWBdtC8GW4fBateCYPWvzkPViyErz8UBqwRESlWOh3UDt5+Gj58PRQAgC3rYdmcMGSliEgxUxFoB94CpP/iN/DmONKIiGRPRaAd9K2DLrVQUR3mqzrB3sOg+77x5hIRaYuKQDuo6gTnLIRDT4e9D4dhZ8IZj6g9QESKnxqG20mXWjj1jrhTiIjkRkcCIiIJpiIgIpJgKgIiIgmmIiAikmBqGG5Hy+bC6n/BXodB3yPjTiMi0ra8FgEz6wPc7e6jzawf8CTwWrT6dHdvNLPbgIOBB939qnzmyaeHLoRnbotmHI67CkZcHGskEZE25e10kJn1BKYAXaJFw4Gr3b0+ejSa2alApbuPBPqa2eB85cmnxhdh0a2w+ePosQ4e+xGsXx13MhGRnctnm0AzMB5oiuaPBs43swVmdl20rB6YHk0/DoxK34iZTTCzBjNraGxszGPcXffRSqjssO2yympYV5xxRUT+LW9FwN2b3H1NyqKZwEh3HwEcaGZDCUcJK6L1TUCfDNuZ7O517l5XW1ubr7i7pc9h2/cTVNlJ3UaISPEr5NVB8939o2h6CTAYWAt0jpbtUeA87abLXvCV+6BTD7BK6NoXvvmIxhMQkeJXyKuDHjazrwJrgDHAZMKRwChgITAMeLmAedrVfsfDDz8I7QEdurT9fBGRYlDIIjARmAVsAm5x95fNbCUw18z6AicT2g1KlpkKgIiUlrwXAXevj/7OAoakrWsys3rgRODnaW0IIiKSZ7HfLObuq2m9QkhERAqoJBtiRUSkfagIRDZ9DBs/avt5IiLlJPbTQXFraYb7vgWL7wzz+50A4++B6s47f52ISDlI/JHAwuvgpRnQsiU8lj0Bj14adyoRkcJIfBF4Y1a4tn+rLRvgjdnx5RERKaTEF4GeB0BFSr8/Vgk994svj4hIISW+CNRfAd37Q4eu4VHTC07+77hTiYgURuIbhjv3hPOeh9dnhTaBQfXQqXvcqURECiPxRQCgugYO/FzcKURECi/xp4NERJJMRUBEJMFUBEREEkxFQEQkwVQEREQSTEVARCTBVARERBJMRUBEJMFUBEREEkxFQEQkwfJaBMysj5nNjaYHmtkTZva4mU22oJ+ZLY+WP2FmtfnMIyIi28pb30Fm1hOYAnSJFp0LnOfuL5nZTOAw4ADgane/OV85RERkx/J5JNAMjAeaANz9cnd/KVrXC3gfOBo438wWmNl1ecwiIiIZ5K0IuHuTu69JX25m44EX3P1tYCYw0t1HAAea2dAMz59gZg1m1tDY2JivuCIiiVTQhmEz2x+4BLgoWjTf3T+KppcAg9Nf4+6T3b3O3etqa9VkICLSngpWBKI2gjuBs1OOEB42s33MrAYYAyxu7/dt3gz/+B3MvhKWPtreWxcRKW2FHFTmMmAgcIOZAfwMmAjMAjYBt7j7y+35hi3N8IfjYeU/YPN6qO4Ex14Bx/ygPd9FRKR0mbvHnSFrdXV13tDQkPXzX3sY7joNNq1tXVZRDZevgwqNqSYiCWFmi9y9LtO6sr5ZbMNqwNIWejgqEBGRMi8CA46B1AOdiiqo/SR07BpfJhGRYlLWRaD7APjGQ9BjvzCY/IBjwryIiARlf2Z84DFw4dK4U4iIFKecjgTMbEyGZUe2XxwRESmkNo8EzKwvoQuIzcCFZvYGUEnoDuJI4ALgxDxmFBGRPMnmdFAD4MA0whf/z4EjgN8BxwLbdQ0hIiKlIZsi8DKhCCwB+gFzgG6EDuBa8hdNRETyLZeGYSdcdT8Y2ItwKqgPUJOHXCIiUgC7c4mopzxERKQE7UoReAV4F3gGeC96iIhICcrmdNCQ6O+hhNNBxwIHAr0JVwmJiEiJyqYIHEno5bOZ0BX0RYQjiI+ApcA38hVORETyq80iEI0ABoCZTXP3JSmrf29m/8xLMhERybusrg6yMADAke7+hwyrP2zXRCIiUjC5NAzfZ2a/MrMLzOwo+HeXEdPyE01ERPItm24jzN3dzF4D/gfYFzjZzG4htBWcnueMIiKSJ9mcDpppZmuB7sABhIbiOuCvhCuGaoHleUsoIiJ5k00ROA0YBHwXmASsBL7o7s1mNgi43cw+66U0TqWIiADZFYHzCUcAq4DngOuB183sDuCTwKUqACIipSmbhuFu0d+RQEdC4fgn8CzhCOGFfARLouZN8Ndz4ee94LoB8ML0uBOJSLnLpgg8ArwOHE7oSXQcMAw4idCt9I939EIz62Nmc6PpajN7wMzmm9nZO1qWZA9dBM9NhfUfQNNyuPdbsGxu3KlEpJxlUwSOJ1wFdCNwEDAVeA1Y6O5TgU+Y2XbbMbOewBSgS7Toe0CDu48ExppZ1x0sS6yX7oEt61vnt6yDV+6PL4+IlL9s7hj+mZl1IVwd1By95ifuPj96ynnunmlcgWZgPHBfNF8PXBZNzydcYZRp2azUjZjZBGACwMCBA7P5N5WsDl3h43db5yuqoWOP2OKISAJkdbOYu3/s7m+7+7uEHkQPSVndYQevaXL31FHHugAroukmwlgEmZalb2eyu9e5e11tbW02cUvWSddBVQ1gUNkBOveCunPjTiUi5SyXQWW2cuBbwG+jUz6PmNlod29q43Vrgc6E4Sj3iOYzLUusA8fCWU/Ay/dDx25w+LegpnfcqUSknOVcBKL7A5qj2V8TTg21VQAAFgGjgLsJDcsLd7As0fodFR4iIoWQTbcRxxHGEm4idB+9Klo+DKh0979m+V5TgAfNbDThdNKThFNB6ctERKRAsjkSGEi4KqiacMqmF/ApYCzwH2292N3ro7/LzOxEwi//n7p7M5BpmYiIFEg2VwdNMbP+wEHu/hiAmc0D3gImA2dk+2bR2ATT21omIiKFkW1X0tcDG8zsYDM7GGiJxhZYYGYX5C2diIjkVZtFwMz2AZ53978DN9PajQTR/FfNrFvGF0vivfMsPPAdeOBceHtR3GlEJF02p4NWAhPNbDDQADxFdG9ANM7ANGAE8HA+g0rpWfE0TKmHzevC/HN/hDMegQEj40wlIqmyHlnM3V9190sAI1wautVv3V0FQLYz96rWAgBhevak+PKIyPayOR3Uzcw6mlk9gLu3uPud0boxwJl5TSgla9O67ZdtzrBMROKTzZHAN4GjgKPN7EYze8jMHjazG4C/EzqBE9nOkROguqZ1vromLBOR4pHNfQJbgF8QCsZa4FqgP/B/gb6EkcZEtnPo6aEn1HnXgDuMvASGfiPuVCKSaqdFwMwOBX4CnEK4OewEQt9BAEvc/X/nN56UumFnhoeIFKedng5y9xeAK4FzCYPIAAwG9gUGm9mjZqaebkRESlQ2p4M2Ee4M/iJhhLEBwHrgh8BDwKOEzt9ERKTEZHOfwO/M7A+ENoD1wP6EsYXfJvQddL2ZVbv75nwGFRGR9tdWm0BHwpjC+wD9gK8QbhTbE3ga+DbwAxUAEZHS1NaRQBWhDWAQcCrh9M+e0fyfgJnA+/mLV1reWwyrl0LtIbDnAXGnERFp206LgLt/DFxpZtcD33H3u81sT6DG3ZcXImCpmD0J5l0LlVXQvBlOuREOPzvuVCIiO5dttxH7A/PM7FNAbwAz656vUKVm1avhWvgt62BjE2xZDw9+Fzasafu1IiJxynZ4yV8ACwjjAdcQBpfpYWb9gNvc/eY85SsJa5aFgeG3rG9dVlEFH78LnVQqRaSI5TLGcG/CcJCzgbnuvsbMekfziS4CtYdAS1rTuFVCtwHx5BERyVbWvYgSRv+aCwwB/mxmNxPGHj4rD7lKSte+cOodUNU5PDr2gK/9Dao7x51MRGTn2rpE9Djgc4SBZL5CuFJoLnA6cARh8PivZvNGZnYeMD6a7QEsAk4ElkbLvufuz+cWv3gM+RJcuhrWNUKXPlBZHXciEZG2tXUkMJtwWehawp3DnwdeBOYQTg39CLgxmzdy95vdvT4aeH4u8Bvgzq3LSrkAbFXVEbr1VwEQkdLRVt9BLYSbwn5JuEns88A7hG4kDnX3xcBFubxh1JjcBxgOjDOzeWZ2h5nl0j4hIiLtIJs2gQMIdwa/GU2PBi4GPgHg7h/m+J7fJTQkPw0c6+6jgA8JPZVux8wmmFmDmTU0Njbm+FYiIrIz2TYMrwGWpzw2Ae+b2WdzeTMzqwCOc/dZwHPR+MUASwjtDdtx98nuXufudbW1tbm8nYiItCGbIvAWofF26z0C3YE7gC5AFzM7PIf3Gw08GU1PNbNhZlZJ6J/onzlsR0RE2kE2RWATMBCYRSgA04GToxvEXgb+O4f3G0NoVAaYBEwFngUWuPujOWxHRETaQTaNsU64pPNa4FPA7cD+ZjY9Wn5htm/m7j9OmV4MDM0+qoiItLedHglEV/I8CXzg7uOAWwiDy7wI/IBwhY/GGBYRKVFtXSK6AhgBmJndThhisgPhy//3hNHFJuU5o4iI5Ek2bQIVhC/+y4HHgXuBp4Afu/v/EMYWEBGREtRmEXD3VcD3CFfw3AgcD/w/d18QPeUyM7P8RRQRkXxps2HYzCYDGwl3Cw8BTgZmmtlYQqPxU+6+KK8pRUQkL7K5OugXwBZgGKH7iLnAFwinhzoBfzWzae7ueUspIiJ50WYRcPdXogHnJ7v7MjNbAfR09zcBzOxoFQARkdKUVadt7r4R+GM0vYXQgdzWdavzE01ERPItl0FlRESkzKgIlKH3l8Cfx8Hto2Dh9aCTdSKyI+rDv8yseRNuHQ6bPgIc3vkHfPwefPa/4k4mIsVIRwJl5oW7oHkD4eJdYPM6ePqmWCOJSBFTESgzGW/b0618IrIDKgJl5pDToaoTWPRftroGhv9nvJlEpHipTaDMdB8A334aHrsc1r0Ph5wGR50fdyoRKVYqAmWo14Hw5bviTiEipUCng0REEkxFQEQkwVQEREQSTEVARCTB1DAsO/TebU+y7uEGqg4aSL+JY7EK3XAgUm4KVgTMrApYGj0gjFZ2GnAK8KS7X1CoLNK2N758E/3u+iE9acGpYvnt/4v+b92tQiBSZgp5OmgocKe717t7PdARGAV8GlhuZicUMIvsxJamjfS/6/tUs45qNtCBtfR5+2He/c28uKOJSDsrZBE4GhhnZvPM7A7CWMUzogFpHgVGZ3qRmU0wswYza2hsbCxg3OTauKKJ9L4mWqhk87L34gkkInlTyCLwNHCsu48CPgQ6AyuidU1An0wvcvfJ7l7n7nW1tbUFCZp0NQf15uOqfWhJ+XhU0Ez3U4+KMZWI5EMhi8Bz7r4yml4CrCUUAoA9CpxFdsIqDB55jNUdD6aFCtZZb1Zdey/dRgyMO5qItLNCfvFONbNhZlYJjAO6ENoEIAxi/0YBs0gbutfvT68Ni7HNm6lpaWSfH6rJRqQcFfIS0UnANMLJ5vuBq4C5ZvZr4KToIUXGqnSAJlLOClYE3H0x4Qqhf4uuCPoc8Gt3f71QWUREJIj1ZjF3Xw/cHWcGEZEk07G+iEiCqQiIiCSYioCISIKpCIiIJJiKgIhIgqkIiIgkmIqAiEiCqQiIiCSYioCISIKpCIiIJJiKgIhIgqkIiIgkmIqAiEiCqQhI8Xv1Vaivh4ED4ctfhtWrc9/G5s1wySWw334wbBjMmtXuMUVKUaxdSYu06cMPYeRI+OADaGmBd9+F11+Hp54Cs+y3c9FF8Pvfw7p1YX7sWFiwAIYO3dmrRMqejgSkuC1YAJs2hQIAYXrx4lAMcjFtWmsBANiwAe67r/1yipQoFQEpbp07txaArZqboVOn3LbTseO281VVUFOze9lEyoCKgBS3UaNgyJDWL/2aGvjmN6FHj9y2c/XVrV/6VVXQvTuccUa7RhUpRWoTkOJWVQVz5sD114cG4pEj4Zxzct/OOedA374wYwb06gUXXwx77dXucUVKjbl7Yd7IrDvwJ0LhWQuMB14DlkZP+Z67P7+zbdTV1XlDQ0Nec4qIlBszW+TudZnWFfJ00NeBX7n7icA7wGXAne5eHz12WgBERKT9Fex0kLv/JmW2FngLGGdmxwDLgDPdfUuh8oiISAwNw2Y2AugJPAIc6+6jgA+BU3bw/Alm1mBmDY2NjYULKiKSAAUtAma2J3ADcDbwnLuvjFYtAQZneo27T3b3Onevq62tLVBSEZFkKFgRMLMOwHTgR+6+DJhqZsPMrBIYB/yzUFlEdtmqVXDNNXDZZfD3v8edRmS3FfIS0XOAI4HLzexyYBYwFTDgfnd/tIBZRHK3ahUcdlj4u2kT3HADTJkCp50WdzKRXVbIhuGbgZvTFk8s1PuL7Lbbb28tABC6obj4YhUBKWm6Y1gkW2vWhN5IU6X2RyRSglQERLL1+c9v22dR584wblx8eUTagYqASLaGD4c//SmMSdC7d+h76Kab4k4lslvUd5BILr7whfAQKRM6EhARSTAVARGRBFMREBFJMBUBEZEEUxEQSbrXX4d588KNcLuqqSl0o7FkSfvl2lXvvANz58Ly5XEnKQkqAiJJdsUVcMghMHYsDBoEjz2W+zaeey5cNnvKKXDEEXDmmVCgwaq28+c/w/77h3s6Bg+Gm9M7KZB0BRtZrD1oZDGRdvTMMzB69LZ3PXfrBqtXQ0UOvw8POgheeaV1vksXmDq18DfSrVkD++wD69e3LuvcORydDBxY2CxFplhGFhORYvLKK1BZue2yDRtCEcjFsmXbzm/cuG1RKJQVK8KY1Kk6dIClSzM/XwAVAZHkOvRQ2JI2mF+XLtCzZ27bGTwYzFrnO3aET35y9/PlasAAaGnZdtmmTSGf7JCKgEhSHXYYXH11+NLu2hW6d4e//S23U0EAM2ZAnz5hGx07wjnnhPaBQuvaFaZPD4Wsa9fQz9Ott0K/foXPUkLUJiBSaC0t8Nvfwvz5MGQIXHTRth3TFVpjY7iiZv/9wxforti4EV59NRxFxPml29wM118Pc+ZAXR1cemk4JVTKZswIxblfv9B1+Z575ryJnbUJqAiIFNpZZ8Fdd4UG2U6dYOjQcHll+vlsyd3pp8ODD4Z927kzfPrT8PjjuR/dFItrroErrwz/nurq0PD9/POhAT8HahgWKRarVsGdd7ZekbNhA7z4IixcGG+ucvDWW/DAA637dv16aGiAZ5+NNdYuc4dJk1r/PZs3h8/PX/7Srm+jIiBSSOvXb/+rtKJi28saZddk2reVlaW9b9MHMWppafd/j4qASCH16xfaAbaep66oCKeEhg+PN1c5+MQnwv0A1dVhvrIyNBAffni8uXaVGXzpS9u2F1VWwkkntevbqAiIFJJZuCt37NhwSeNnPgMLFuR8jlcyqKyE2bPh5JPDvj3uuND4XlMTd7JdN3VquAN7333hqKNC+8agQe36FkXRMGxmtwEHAw+6+1U7ep4ahkVEclfUDcNmdipQ6e4jgb5mpjs7REQKJPYiANQD06Ppx4FR8UUREUmWYigCXYAV0XQT0Cd1pZlNMLMGM2tobGwseDgRkXJWDEVgLdA5mt6DtEzuPtnd69y9rra2tuDhRETKWTEUgUW0ngIaBrwRXxQRkWQphvvU7wXmmllf4GTg6HjjiIgkR7FcItoTOBGY4+7v7OR5jcCyHazuDbyfh3j5UEpZobTyllJWKK28pZQVSitvvrPu6+4Zz6cXRRFoD2bWsKPrYItNKWWF0spbSlmhtPKWUlYorbxxZi2GNgEREYmJioCISIKVUxGYHHeAHJRSViitvKWUFUorbyllhdLKG1vWsmkTEBGR3JXTkYCIiORIRUBEJMHKogiY2W1mNt/MfhJ3lnRm1t3MZprZI2b2FzPrYGZvmtkT0eOwuDOmMrOq9HxmNtHMnjazG+POl8rMzkvJ+Wz0OSjKfWtmfcxsbjRdbWYPRJ/Zs3e0rEiyDoz25eNmNtmCfma2PGU/x9qfS1rejNmK5TsiLevElJxLzOxHsexbdy/pB3Aq8Pto+jfA4LgzpeU7Hzgxmr4Z+Clwbdy5dpL3iNR8QB3wGGDAZcAJcWfcQe4bgCOLcd8CPYGHgGei+e8DV0TT9wBdMy0rkqxXAwdH0zOBodH/c+fFvV93kHe7bMXyHZGeNW3dXUC/OPZtORwJ1FPEXVG7+2/c/ZFothbYAowzs3lmdoeZFUPXHamOJiUfcDwww8Mn9VFgdKzpMjCzfoTeZ4dTnPu2GRhP6CUXtv3MzicU2kzL4rBNVne/3N1fitb1ItzVejRwvpktMLPr4on5b+n7NlO2eorjOyI9KwBmdhSwwt1XEMO+LYcisNOuqIuFmY0g/BJ4BDjW3UcBHwKnxJkrg6fZNl9nin//fpdwlJWevSj2rbs3ufualEWZPrNF8TnOkBUAMxsPvODubxOOCEa6+wjgQDMbWuicW2XImylbUe9b4ELCkSzEsG/LoQjstCvqYmBmexL+I58NPOfuK6NVS4BiG0ktPV9R718zqwCOc/dZFP++3SrTPi3a/Wxm+wOXABdFi+a7+0fRdLHt50zZinnf9gD2cvd/RYsKvm+LZmfshqLuitrMOhAORX/k7suAqWY2zMwqgXHAP2MNuL30fF0o4v1LOD31ZDRd7Pt2q0yf2aL8HEedO94JnJ3yK/ZhM9vHzGqAMcDi2AJuL1O2oty3kS8CD6bMF3zfFss5091xL8XdFfU5hAbLy83scmAWMJXQ0Hq/uz8aZ7gMJgHTiPIBVxH276+Bk6JHMRkDzImmt8lehPt2qynAg2Y2GjiEUMRWZFhWDC4DBgI3mBnAz4CJhM/xJuAWd385vnjb2S6bma2keL8jxgC/SJkv+L4tizuGLcuuqGXXmFln4HOEqxqWxp2nHERfSKOAh7f+ws60TNqHviN2rCyKgIiI7JpyaBMQEZFdpCIgIpJgKgIibTCzPcysU8q8mdkn48wk0l7K4eogkYzM7Hl3Pyy6Fvtqwm35NYS7Xn/q7q9Fl5N2cfem6DVLgGHuvjFlU98HXgXujPr16QD81MyuBKqBP7j7h9Hr+0Y3VGFmZwFvuftjabkmEe5cPRH4CLgJuBs4xd2bU57Xj3B545K0f9pBwFHuvnx39o8I6EhAypCZVUY3ka2P/n4Z2BfYGzgA6EHrdeNHAPPM7IhofkNqATCzbsA3gf8wsz8CzxD6zzkFOAt4FtiQ8vYTzOweMxtA+LJuScu2B+Gu1ZHAXlGeQcDH7t5sZhVRZgiXCe5I807WiWRNRwJSjj4HXA4MAZ4g/FIfa2YnAGPd/SKA6BTPy8CZwI3R+nTfIfSZdA/hWv7TgMOi96ggFJMq4HEzO9Ddr4iu9T+IcMTQlLa97oQ+eC4gFJD5hG4vDjCzOYSi8CXgKcIX/TTgxbRtHMzOC4RI1nQkIGXH3e939+HAi+7+GXf/7Q6eOpzwJbyHux/j7uszbOvnhF5UxwPrgdcJX84nAh8D/d398ejp50Zf5B9EN6p1IZzuSdVM+OX/S0JR2TvKcTlwLjDd3Z8yszMJd+r2B05Ie/QH7jCzk3PbMyLbUxGQshTdHNQjOjXz2UzPcffZhF/2/ye14TdtO72APYGOwH7AAGAGcE30OCB6XiXwA+CHwNabb7b2url1nIZqwlHDREJbwi+B64CVhLvKBwJLo2xTCH31dAIagDWEo4oGQj84F7v7zNz3jMi2dDpIyo6ZfZ3QDtAR+HqmX/hRp377Ebr3ngFcYGZ9gP5m9lfCD6R7Cf2/DyIUgv0IX/B7EY4KIHRRAeFI4UJgc7R9I/RTc3/U3UIl8DvgBUJXHIOj9Z+KtntPtJ3UL/YKQnHoFr2nAQdGr63MeceIZKAiIGXH3e8gnC5ZmKkARD5NGCvh7eixgPCL/GR3/3zqE81sCvAVYBnhXPzPgdcI5/3vit5zGuH8/dbXnEn4Zf+Au/8jbXvTCf3XzCR0z7zZzJ4hdCY2KeWpHwG3A6uAwwlFYRHQm+3bGkR2iYqAlLPqDPMO4O4PEX7lb8PMMvWj0h34NuFX/q3AWe5+vJndA1xqZidF29vatfV5hKt/JgC3mlkL8J/u/kG0vVsI7Qk/AZaZ2X7AoYQCcwThlA+EIlVPaEfYeiQwgFAMGoA3c9kZIpmoTUDKkpk9RMpVNWb2FcL1+LPbeGkHM+uYtux9wrn+lYRG4h9Hy7sCC4FBZtbJzC4A5gGV7v51d//Y3b9G6Ep8tpntndI18xDC0cBEwmmiawjtEzeZ2SeiS1bPILQFrE17NAHfi0akEtkt6kBOEiEaarLZd+MDHxWHTTvaRnS1zoKtN46lreuRckNZlbtviaYNqNh6k5iZ2e5kFMmVioCISILpdJCISIKpCIiIJJiKgIhIgqkIiIgkmIqAiEiC/X+Sic7IX7ltFAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "movie.plot(kind='scatter', x='打斗镜头数量', y='接吻镜头数量', cmap='rainbow', c=[1,1,0,1,0,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ecc67b4",
   "metadata": {
    "hidden": true
   },
   "source": [
    "对y_train没有维度要求，一般都是一维，甚至不需要必须是数字（字符串在内部会转成数字），但是注意y_train不能是Series类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a668889b",
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, shuffle=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da224dc7",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## （3）训练模型"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1933efe3",
   "metadata": {
    "hidden": true
   },
   "source": [
    "n_neighbors = 5 即K值，表示k取多少，默认K=5，大部分时候默认值的效果就可以了。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "d885c442",
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "knn = KNeighborsClassifier()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "44a4b70e",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier()</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "KNeighborsClassifier()"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a212efdc",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## （4）模型预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "b6321e89",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['爱情片', '爱情片', '爱情片', '动作片', '爱情片'], dtype=object)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "c94278bf",
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "y_pred = knn.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "9a10c016",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['爱情片', '爱情片', '爱情片', '爱情片', '爱情片'], dtype=object)"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b4925f7",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## （5）模型评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "69d30673",
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算准确率\n",
    "accuracy = (y_pred == y_test).sum() / len(y_pred)\n",
    "accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11df1602",
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {
    "height": "196px",
    "width": "160px"
   },
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
